[1] "Player" "Salary" "NBA_Country"
[4] "NBA_DraftNumber" "Age" "Tm"
[7] "G" "MP" "PER"
[10] "TS." "X3PAr" "FTr"
[13] "ORB." "DRB." "TRB."
[16] "AST." "STL." "BLK."
[19] "TOV." "USG." "OWS"
[22] "DWS" "WS" "WS.48"
[25] "OBPM" "DBPM" "BPM"
[28] "VORP"
[1] "player" "salary" "nba_country"
[4] "nba_draft_number" "age" "tm"
[7] "g" "mp" "per"
[10] "ts" "x3p_ar" "f_tr"
[13] "orb" "drb" "trb"
[16] "ast" "stl" "blk"
[19] "tov" "usg" "ows"
[22] "dws" "ws" "ws_48"
[25] "obpm" "dbpm" "bpm"
[28] "vorp"
── Data Summary ────────────────────────
Values
Name raw_data
Number of rows 485
Number of columns 28
_______________________
Column type frequency:
factor 3
numeric 25
________________________
Group variables None
── Variable type: factor ────────────────────────────────────────
skim_variable n_missing complete_rate ordered n_unique
1 player 0 1 FALSE 483
2 nba_country 0 1 FALSE 44
3 tm 0 1 FALSE 31
top_counts
1 Kay: 3, Aar: 1, Aar: 1, Aar: 1
2 USA: 374, Can: 12, Fra: 9, Aus: 8
3 TOT: 55, DAL: 18, MEM: 17, UTA: 17
── Variable type: numeric ───────────────────────────────────────
skim_variable n_missing complete_rate mean
1 salary 0 1 6636507.
2 nba_draft_number 0 1 29.5
3 age 0 1 26.3
4 g 0 1 50.2
5 mp 0 1 1154.
6 per 0 1 13.3
7 ts 2 0.996 0.535
8 x3p_ar 2 0.996 0.337
9 f_tr 2 0.996 0.263
10 orb 0 1 4.87
11 drb 0 1 15.0
12 trb 0 1 9.91
13 ast 0 1 12.9
14 stl 0 1 1.53
15 blk 0 1 1.71
16 tov 2 0.996 13.1
17 usg 0 1 18.9
18 ows 0 1 1.28
19 dws 0 1 1.18
20 ws 0 1 2.46
21 ws_48 0 1 0.0800
22 obpm 0 1 -1.27
23 dbpm 0 1 -0.489
24 bpm 0 1 -1.76
25 vorp 0 1 0.599
sd p0 p25 p50 p75
1 7392602. 46080 1471382 3202217 10000000
2 21.1 1 11 25 47
3 4.27 19 23 26 29
4 24.9 1 29 59 71
5 811. 1 381 1134 1819
6 8.77 -41.1 9.8 13.2 16.5
7 0.112 0 0.506 0.545 0.582
8 0.227 0 0.167 0.346 0.481
9 0.295 0 0.155 0.231 0.320
10 4.58 0 1.8 3.2 7
11 6.85 0 10.2 14 18.8
12 4.96 0 6.2 8.7 13.3
13 9.11 0 6.9 9.9 17.6
14 0.990 0 1 1.5 1.9
15 1.68 0 0.6 1.2 2.2
16 6.12 0 9.9 12.5 15.8
17 5.94 0 15 17.9 22.2
18 1.88 -2.3 0 0.8 2
19 1.03 0 0.3 1 1.8
20 2.67 -1.2 0.3 1.8 3.6
21 0.163 -1.06 0.04 0.083 0.123
22 5.03 -36.5 -2.7 -1.1 0.4
23 2.39 -14.3 -1.7 -0.4 1
24 5.66 -49.2 -3.6 -1.3 0.5
25 1.25 -1.3 -0.1 0.1 0.9
p100 hist
1 34682550 ▇▂▁▁▁
2 62 ▇▆▃▃▆
3 41 ▇▇▆▂▁
4 79 ▃▂▂▃▇
5 2898 ▇▅▆▅▂
6 134. ▁▇▁▁▁
7 1.5 ▁▇▂▁▁
8 1 ▇▇▇▂▁
9 5.33 ▇▁▁▁▁
10 35.9 ▇▂▁▁▁
11 37.6 ▂▇▅▂▁
12 26.5 ▂▇▃▂▁
13 49.4 ▇▅▂▁▁
14 12.5 ▇▁▁▁▁
15 13.4 ▇▂▁▁▁
16 66.7 ▇▆▁▁▁
17 45.1 ▁▇▆▁▁
18 11.4 ▇▇▂▁▁
19 5.6 ▇▅▂▁▁
20 15 ▇▅▁▁▁
21 2.71 ▁▇▁▁▁
22 68.7 ▁▇▁▁▁
23 6.8 ▁▁▃▇▁
24 54.4 ▁▁▇▁▁
25 8.6 ▇▃▁▁▁
# delete duplicate
# Remove duplicate rows of the dataframe
raw_data %<>% distinct(player,.keep_all= TRUE)
# delete NA's
raw_data %<>% drop_na()
# Summarise
skim(raw_data)── Data Summary ────────────────────────
Values
Name raw_data
Number of rows 481
Number of columns 28
_______________________
Column type frequency:
factor 3
numeric 25
________________________
Group variables None
── Variable type: factor ────────────────────────────────────────
skim_variable n_missing complete_rate ordered n_unique
1 player 0 1 FALSE 481
2 nba_country 0 1 FALSE 44
3 tm 0 1 FALSE 31
top_counts
1 Aar: 1, Aar: 1, Aar: 1, Abd: 1
2 USA: 370, Can: 12, Fra: 9, Aus: 8
3 TOT: 54, DAL: 18, MEM: 17, UTA: 17
── Variable type: numeric ───────────────────────────────────────
skim_variable n_missing complete_rate mean
1 salary 0 1 6682859.
2 nba_draft_number 0 1 29.3
3 age 0 1 26.3
4 g 0 1 50.5
5 mp 0 1 1163.
6 per 0 1 13.4
7 ts 0 1 0.536
8 x3p_ar 0 1 0.338
9 f_tr 0 1 0.264
10 orb 0 1 4.91
11 drb 0 1 15.0
12 trb 0 1 9.97
13 ast 0 1 13.0
14 stl 0 1 1.54
15 blk 0 1 1.72
16 tov 0 1 13.1
17 usg 0 1 18.9
18 ows 0 1 1.29
19 dws 0 1 1.19
20 ws 0 1 2.48
21 ws_48 0 1 0.0814
22 obpm 0 1 -1.22
23 dbpm 0 1 -0.477
24 bpm 0 1 -1.7
25 vorp 0 1 0.605
sd p0 p25 p50 p75
1 7405536. 46080 1471382 3290000 10000000
2 21.1 1 10 24 47
3 4.27 19 23 26 29
4 24.7 1 30 59 71
5 809. 1 391 1155 1830
6 8.74 -41.1 9.9 13.3 16.6
7 0.112 0 0.506 0.545 0.583
8 0.227 0 0.167 0.346 0.482
9 0.295 0 0.155 0.231 0.32
10 4.58 0 1.8 3.3 7.1
11 6.80 0 10.3 14 18.8
12 4.93 0 6.2 8.7 13.3
13 9.09 0 6.9 9.9 17.2
14 0.988 0 1 1.5 1.9
15 1.69 0 0.6 1.2 2.2
16 6.12 0 9.9 12.5 15.6
17 5.81 5.7 15 17.9 22.2
18 1.88 -2.3 0 0.8 2
19 1.03 0 0.3 1 1.8
20 2.67 -1.2 0.4 1.9 3.6
21 0.163 -1.06 0.042 0.083 0.123
22 5.02 -36.5 -2.6 -1 0.4
23 2.39 -14.3 -1.6 -0.4 1
24 5.64 -49.2 -3.5 -1.2 0.6
25 1.25 -1.3 -0.1 0.1 0.9
p100 hist
1 34682550 ▇▂▁▁▁
2 62 ▇▅▃▃▆
3 41 ▇▇▆▂▁
4 79 ▃▂▂▃▇
5 2898 ▇▅▆▅▂
6 134. ▁▇▁▁▁
7 1.5 ▁▇▂▁▁
8 1 ▇▇▇▂▁
9 5.33 ▇▁▁▁▁
10 35.9 ▇▂▁▁▁
11 37.6 ▂▇▅▂▁
12 26.5 ▂▇▃▂▁
13 49.4 ▇▅▂▁▁
14 12.5 ▇▁▁▁▁
15 13.4 ▇▂▁▁▁
16 66.7 ▇▆▁▁▁
17 45.1 ▂▇▃▁▁
18 11.4 ▇▇▂▁▁
19 5.6 ▇▅▂▁▁
20 15 ▇▅▁▁▁
21 2.71 ▁▇▁▁▁
22 68.7 ▁▇▁▁▁
23 6.8 ▁▁▂▇▁
24 54.4 ▁▁▇▁▁
25 8.6 ▇▃▁▁▁
raw_data %>%
select_at(vars(-c("player","nba_country","tm"))) %>%
tidyr::gather("id", "value", 2:25) %>%
ggplot(., aes(y=salary, x=value))+
geom_point()+
geom_smooth(method = "lm", se=FALSE, color="black")+
facet_wrap(~id,ncol=2,scales="free_x")
raw_data %>%
select_at(vars(-c("player","nba_country","tm"))) %>%
tidyr::gather("id", "value", 2:25) %>%
ggplot(., aes(y=log(salary), x=value))+
geom_point()+
geom_smooth(method = "lm", se=FALSE, color="black")+
facet_wrap(~id,ncol=2,scales="free_x")── Data Summary ────────────────────────
Values
Name log_data
Number of rows 481
Number of columns 28
_______________________
Column type frequency:
factor 3
numeric 25
________________________
Group variables None
── Variable type: factor ────────────────────────────────────────
skim_variable n_missing complete_rate ordered n_unique
1 player 0 1 FALSE 481
2 nba_country 0 1 FALSE 44
3 tm 0 1 FALSE 31
top_counts
1 Aar: 1, Aar: 1, Aar: 1, Abd: 1
2 USA: 370, Can: 12, Fra: 9, Aus: 8
3 TOT: 54, DAL: 18, MEM: 17, UTA: 17
── Variable type: numeric ───────────────────────────────────────
skim_variable n_missing complete_rate mean sd
1 salary 0 1 15.0 1.49
2 nba_draft_number 0 1 29.3 21.1
3 age 0 1 26.3 4.27
4 g 0 1 50.5 24.7
5 mp 0 1 1163. 809.
6 per 0 1 13.4 8.74
7 ts 0 1 0.536 0.112
8 x3p_ar 0 1 0.338 0.227
9 f_tr 0 1 0.264 0.295
10 orb 0 1 4.91 4.58
11 drb 0 1 15.0 6.80
12 trb 0 1 9.97 4.93
13 ast 0 1 13.0 9.09
14 stl 0 1 1.54 0.988
15 blk 0 1 1.72 1.69
16 tov 0 1 13.1 6.12
17 usg 0 1 18.9 5.81
18 ows 0 1 1.29 1.88
19 dws 0 1 1.19 1.03
20 ws 0 1 2.48 2.67
21 ws_48 0 1 0.0814 0.163
22 obpm 0 1 -1.22 5.02
23 dbpm 0 1 -0.477 2.39
24 bpm 0 1 -1.7 5.64
25 vorp 0 1 0.605 1.25
p0 p25 p50 p75 p100 hist
1 10.7 14.2 15.0 16.1 17.4 ▂▁▇▆▆
2 1 10 24 47 62 ▇▅▃▃▆
3 19 23 26 29 41 ▇▇▆▂▁
4 1 30 59 71 79 ▃▂▂▃▇
5 1 391 1155 1830 2898 ▇▅▆▅▂
6 -41.1 9.9 13.3 16.6 134. ▁▇▁▁▁
7 0 0.506 0.545 0.583 1.5 ▁▇▂▁▁
8 0 0.167 0.346 0.482 1 ▇▇▇▂▁
9 0 0.155 0.231 0.32 5.33 ▇▁▁▁▁
10 0 1.8 3.3 7.1 35.9 ▇▂▁▁▁
11 0 10.3 14 18.8 37.6 ▂▇▅▂▁
12 0 6.2 8.7 13.3 26.5 ▂▇▃▂▁
13 0 6.9 9.9 17.2 49.4 ▇▅▂▁▁
14 0 1 1.5 1.9 12.5 ▇▁▁▁▁
15 0 0.6 1.2 2.2 13.4 ▇▂▁▁▁
16 0 9.9 12.5 15.6 66.7 ▇▆▁▁▁
17 5.7 15 17.9 22.2 45.1 ▂▇▃▁▁
18 -2.3 0 0.8 2 11.4 ▇▇▂▁▁
19 0 0.3 1 1.8 5.6 ▇▅▂▁▁
20 -1.2 0.4 1.9 3.6 15 ▇▅▁▁▁
21 -1.06 0.042 0.083 0.123 2.71 ▁▇▁▁▁
22 -36.5 -2.6 -1 0.4 68.7 ▁▇▁▁▁
23 -14.3 -1.6 -0.4 1 6.8 ▁▁▂▇▁
24 -49.2 -3.5 -1.2 0.6 54.4 ▁▁▇▁▁
25 -1.3 -0.1 0.1 0.9 8.6 ▇▃▁▁▁
# Excluded vars (factor)
vars <- c("player","nba_country","tm")
# Correlations
corrplot(cor(log_data %>%
select_at(vars(-vars)),
use = "complete.obs"),
method = "circle",type = "upper")
# Other Correlations
ggcorrplot(cor(log_data %>%
select_at(vars(-vars)),
use = "complete.obs"),
hc.order = TRUE,
type = "lower", lab = TRUE)model_vif <- lm(salary~.-player-nba_country-tm, data=log_data)
vif_values <- car::vif(model_vif)
#create horizontal bar chart to display each VIF value
barplot(vif_values, main = "VIF Values", horiz = TRUE, col = "steelblue")
#add vertical line at 5
abline(v = 5, lwd = 3, lty = 2)| x | |
|---|---|
| nba_draft_number | 1.340170 |
| age | 1.078945 |
| g | 6.999197 |
| mp | 14.172245 |
| per | 110.918970 |
| ts | 6.146914 |
| x3p_ar | 5.301979 |
| f_tr | 1.264400 |
| orb | 317.236811 |
| drb | 684.388198 |
| trb | 1439.666086 |
| ast | 3.445392 |
| stl | 3.208627 |
| blk | 5.305430 |
| tov | 1.917591 |
| usg | 6.861721 |
| ows | 1329.661494 |
| dws | 405.100887 |
| ws | 2683.730741 |
| ws_48 | 67.930181 |
| obpm | 10524.307876 |
| dbpm | 2307.119717 |
| bpm | 12928.849699 |
| vorp | 11.551583 |
Variable endógena: - Salario: log
Variables exógenas:
- Edad (Age): se presupone que a mayor edad mayor salario - Edad elevado alcuadrado: considero que a partir de cierta edad ya no aumenta el salario con la edad - Número del draft(NBA_DraftNumber): a menor número en el draft mayor salario - Minutos jugados (MP): a mayor númerode minutos jugados mayor salario - Minutos jugados al cuadrado: a partir de un cierto número de minutosjugados ya no aumenta el salario - Eficiencia del jugador: a mayor eficiencia mayor salario - Eficiencia deljugador al cuadrado: a partir de cierto nivel de eficiencia ya no afecta al salario - Contribución a las victorias del equipo: a mayor contribución a las victorias del equipo mayor salario - Contribución a las victorias del equipo al cuadrado: a partir de cierto nivel de aportación a las victorias del equipo ya no afecta al salario - Porcentaje de participación en el juego (USG%): A mayor participación mayor salario - Valor sobre jugadorde reemplazo (VORP): a mayor VORP mayor salario - Valor sobre jugador de reemplazo al cuadrado: a partir de cierto nivel de VORP ya no afecta al salario - Efectividad de tiro (TS%): a mayor efectividad de tiro mayor salario - Efectividad asistencias (AST%): a mayor efectividad de asistencias mayor salario - Interacciónde WS y VORP (WS:VORP): considero que están relacionadas estas dos variables, a mayores valores deWS y VORP mayor será el salario del jugadorA continuación se filtra la base de datos para poder observar sólo las variables que me interesan.
nba <- log_data %>% select_at(vars(-vars))
set.seed(1234)
num_data <- nrow(nba)
num_data_test <- 10
train=sample(num_data ,num_data-num_data_test)
data_train <- nba[train,]
data_test <- nba[-train,]
model_select <- regsubsets(salary~. , data =data_train, method = "seqrep",nvmax=24)
model_select_summary <- summary(model_select)
data.frame(
Adj.R2 = (model_select_summary$adjr2),
CP = (model_select_summary$cp),
BIC = (model_select_summary$bic)
) nba_draft_number age g mp per ts x3p_ar f_tr orb
1 ( 1 ) " " " " " " "*" " " " " " " " " " "
2 ( 1 ) "*" "*" " " " " " " " " " " " " " "
3 ( 1 ) "*" "*" "*" " " " " " " " " " " " "
4 ( 1 ) "*" "*" " " "*" " " " " " " " " " "
5 ( 1 ) "*" "*" " " "*" " " " " " " " " " "
6 ( 1 ) "*" "*" " " "*" " " " " " " " " " "
7 ( 1 ) "*" "*" " " "*" "*" "*" " " " " " "
8 ( 1 ) "*" "*" " " "*" "*" "*" " " " " " "
9 ( 1 ) "*" "*" " " "*" "*" "*" " " " " " "
10 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*"
11 ( 1 ) "*" "*" " " "*" "*" "*" "*" " " " "
12 ( 1 ) "*" "*" " " "*" "*" "*" "*" "*" " "
13 ( 1 ) "*" "*" " " "*" "*" "*" " " "*" " "
14 ( 1 ) "*" "*" "*" "*" "*" "*" " " "*" " "
15 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*"
16 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" " "
17 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" " "
18 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*"
19 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" " "
20 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*"
21 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*"
22 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*"
23 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*"
24 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*"
drb trb ast stl blk tov usg ows dws ws ws_48 obpm
1 ( 1 ) " " " " " " " " " " " " " " " " " " " " " " " "
2 ( 1 ) " " " " " " " " " " " " " " " " " " " " " " " "
3 ( 1 ) " " " " " " " " " " " " " " " " " " " " " " " "
4 ( 1 ) "*" " " " " " " " " " " " " " " " " " " " " " "
5 ( 1 ) "*" " " " " " " " " "*" " " " " " " " " " " " "
6 ( 1 ) "*" " " " " " " " " "*" " " " " " " " " " " " "
7 ( 1 ) "*" " " " " " " " " "*" " " " " " " " " " " " "
8 ( 1 ) "*" " " " " " " " " " " "*" " " " " " " " " " "
9 ( 1 ) " " "*" " " " " " " "*" "*" " " " " " " " " " "
10 ( 1 ) "*" " " " " " " " " " " " " " " " " " " " " " "
11 ( 1 ) "*" " " " " " " " " "*" "*" " " " " " " "*" " "
12 ( 1 ) "*" " " " " " " " " "*" "*" " " " " " " "*" " "
13 ( 1 ) " " "*" "*" " " " " "*" "*" " " "*" " " "*" " "
14 ( 1 ) " " "*" "*" " " " " "*" "*" " " "*" " " "*" " "
15 ( 1 ) "*" "*" "*" "*" "*" "*" " " " " " " " " " " " "
16 ( 1 ) " " "*" "*" " " " " "*" "*" " " "*" " " "*" " "
17 ( 1 ) " " "*" "*" "*" " " "*" "*" " " "*" " " "*" " "
18 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" " " " " " "
19 ( 1 ) " " "*" "*" "*" " " "*" "*" " " "*" " " "*" "*"
20 ( 1 ) " " "*" "*" "*" " " "*" "*" " " "*" " " "*" "*"
21 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"
22 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"
23 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"
24 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"
dbpm bpm vorp
1 ( 1 ) " " " " " "
2 ( 1 ) " " " " " "
3 ( 1 ) " " " " " "
4 ( 1 ) " " " " " "
5 ( 1 ) " " " " " "
6 ( 1 ) "*" " " " "
7 ( 1 ) " " " " " "
8 ( 1 ) " " "*" " "
9 ( 1 ) " " "*" " "
10 ( 1 ) " " " " " "
11 ( 1 ) " " "*" " "
12 ( 1 ) " " "*" " "
13 ( 1 ) "*" " " " "
14 ( 1 ) "*" " " " "
15 ( 1 ) " " " " " "
16 ( 1 ) "*" " " "*"
17 ( 1 ) "*" " " "*"
18 ( 1 ) " " " " " "
19 ( 1 ) "*" "*" "*"
20 ( 1 ) "*" "*" "*"
21 ( 1 ) " " " " " "
22 ( 1 ) "*" " " " "
23 ( 1 ) "*" "*" " "
24 ( 1 ) "*" "*" "*"
data.frame(
Adj.R2 = which.max(model_select_summary$adjr2),
CP = which.min(model_select_summary$cp),
BIC = which.min(model_select_summary$bic)
) (Intercept) mp
13.686516293 0.001081436
(Intercept) nba_draft_number age
10.2775337129 -0.0228320762 0.1019896961
mp per ts
0.0008949959 -0.1522594980 3.0138547540
f_tr trb ast
-0.1938126244 0.0646079523 0.0153101029
tov usg dws
-0.0206763451 0.0716940853 -0.1982512510
ws_48 dbpm
5.3625083897 0.0973437301
(Intercept) nba_draft_number age
11.6105898048 -0.0234964720 0.1038514895
mp drb
0.0007929329 0.0246685804
“All models are wrong, some models are useful”, Box, G.E.P
Call:
lm(formula = salary ~ mp, data = data_train)
Residuals:
Min 1Q Median 3Q Max
-3.1631 -0.7202 0.1293 0.7470 3.5157
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.369e+01 9.861e-02 138.79 <2e-16 ***
mp 1.081e-03 6.962e-05 15.53 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.217 on 469 degrees of freedom
Multiple R-squared: 0.3397, Adjusted R-squared: 0.3383
F-statistic: 241.3 on 1 and 469 DF, p-value: < 2.2e-16
# CP model
nba_cp <- lm(salary~ nba_draft_number+age+mp+per+ts+f_tr+trb+ast+tov+usg+dws+ws_48+dbpm, data =data_train)
summary(nba_cp)
Call:
lm(formula = salary ~ nba_draft_number + age + mp + per + ts +
f_tr + trb + ast + tov + usg + dws + ws_48 + dbpm, data = data_train)
Residuals:
Min 1Q Median 3Q Max
-3.6082 -0.5492 0.0161 0.6157 3.4353
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 10.2775337 0.5171307 19.874 < 2e-16 ***
nba_draft_number -0.0228321 0.0024638 -9.267 < 2e-16 ***
age 0.1019897 0.0111692 9.131 < 2e-16 ***
mp 0.0008950 0.0001223 7.316 1.15e-12 ***
per -0.1522595 0.0381420 -3.992 7.63e-05 ***
ts 3.0138548 0.8052136 3.743 0.000205 ***
f_tr -0.1938126 0.1671665 -1.159 0.246899
trb 0.0646080 0.0155087 4.166 3.71e-05 ***
ast 0.0153101 0.0074751 2.048 0.041115 *
tov -0.0206763 0.0092287 -2.240 0.025542 *
usg 0.0716941 0.0197169 3.636 0.000308 ***
dws -0.1982513 0.1082441 -1.832 0.067674 .
ws_48 5.3625084 1.6964142 3.161 0.001676 **
dbpm 0.0973437 0.0333722 2.917 0.003709 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.008 on 457 degrees of freedom
Multiple R-squared: 0.5589, Adjusted R-squared: 0.5464
F-statistic: 44.54 on 13 and 457 DF, p-value: < 2.2e-16
Call:
lm(formula = salary ~ nba_draft_number + age + mp + drb, data = data_train)
Residuals:
Min 1Q Median 3Q Max
-3.6240 -0.5413 0.0400 0.6180 3.1151
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.161e+01 3.349e-01 34.671 < 2e-16 ***
nba_draft_number -2.350e-02 2.442e-03 -9.622 < 2e-16 ***
age 1.039e-01 1.116e-02 9.309 < 2e-16 ***
mp 7.929e-04 6.325e-05 12.537 < 2e-16 ***
drb 2.467e-02 7.088e-03 3.481 0.000547 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.025 on 466 degrees of freedom
Multiple R-squared: 0.5353, Adjusted R-squared: 0.5313
F-statistic: 134.2 on 4 and 466 DF, p-value: < 2.2e-16
# Prediction
# adjR2
predict_r2 <- predict(nba_r2,newdata = data_test)
cbind(predict_r2,data_test$salary) predict_r2
16 14.66197 13.86430
47 15.34652 14.91412
71 13.82386 16.31994
99 14.40892 14.20171
147 14.91286 15.60727
199 16.29926 16.88175
212 16.77618 16.46169
232 13.80115 14.23705
281 14.51057 14.66080
326 13.94714 14.23705
predict_r2
16 2331380.6 1050000
47 4622840.5 3000000
71 1008383.0 12236535
99 1810140.9 1471382
147 2996226.5 6000000
199 11986177.1 21461010
212 19310851.0 14100000
232 985740.5 1524305
281 2003828.6 2328652
326 1140688.1 1524305
[1] 0.8313601
[1] 0.9117895
predict_cp
16 14.17898 13.86430
47 15.09042 14.91412
71 14.09767 16.31994
99 14.70682 14.20171
147 14.54377 15.60727
199 17.01606 16.88175
212 16.01029 16.46169
232 13.72628 14.23705
281 16.06819 14.66080
326 13.51050 14.23705
predict_cp
16 1438315.1 1050000
47 3578382.0 3000000
71 1325989.8 12236535
99 2438314.3 1471382
147 2071465.4 6000000
199 24545975.0 21461010
212 8978042.9 14100000
232 914638.0 1524305
281 9513178.4 2328652
326 737118.4 1524305
[1] 0.9446033
[1] 0.971907
predict_bic
16 14.23011 13.86430
47 15.04735 14.91412
71 14.17690 16.31994
99 14.30604 14.20171
147 14.42756 15.60727
199 17.22440 16.88175
212 16.03474 16.46169
232 13.84160 14.23705
281 16.29425 14.66080
326 13.56148 14.23705
predict_bic
16 1513757.0 1050000
47 3427521.0 3000000
71 1435326.0 12236535
99 1633187.3 1471382
147 1844205.7 6000000
199 30231746.6 21461010
212 9200219.0 14100000
232 1026431.3 1524305
281 11926165.3 2328652
326 775667.8 1524305
[1] 0.9727375
[1] 0.9862745